import base64

from common.logger import get_logger
from impacket.dpapi import DPAPI_BLOB
from impacket.uuid import bin_to_string
from pydantic import BaseModel

logger = get_logger(__name__)


class ParsedDpapiBlob(BaseModel):
    dpapi_master_key_guid: str | None = ""
    dpapi_data_b64: str | None = None
    dpapi_blob_raw: bytes | None = None
    success: bool = False  # true/false if parsing was successful or not
    blob_offset: int = 0  # offset in the file where the blob was found


async def parse_dpapi_blob(blob_bytes: bytes) -> ParsedDpapiBlob:
    """Async Helper that parses a single DPAPI blob to a dict of {dpapi_master_key_guid, dpapi_data_b64}"""

    parsed_blob = ParsedDpapiBlob()

    # it's a bit tricky to carve _just_ the DPAPI blob, but this is how:
    blob = DPAPI_BLOB(blob_bytes)
    if blob.rawData is not None:
        blob.rawData = blob.rawData[: len(blob.getData())]
        parsed_blob.dpapi_master_key_guid = bin_to_string(blob["GuidMasterKey"]).lower()
        parsed_blob.dpapi_data_b64 = base64.b64encode(blob.rawData).decode("utf-8")
        parsed_blob.dpapi_blob_raw = blob.rawData
        parsed_blob.success = True

    return parsed_blob


async def carve_dpapi_blobs_from_bytes(
    raw_bytes: bytes, file_name: str = "", object_id: str = "", base_offset: int = 0
) -> list[ParsedDpapiBlob]:
    """
    Helper that _just_ carves raw DPAPI blobs from bytes,
    returning a list of dicts {dpapi_master_key_guid, dpapi_data_b64}

    Args:
        raw_bytes: The bytes to search for DPAPI blobs
        file_name: Optional file name for logging
        object_id: Optional object ID for logging
        base_offset: The base offset in the original file (for chunked reading)
    """
    dpapi_blobs = []
    seen_blobs = set()  # Track unique blobs by their base64 data
    dpapi_signature = b"\x01\x00\x00\x00\xd0\x8c\x9d\xdf\x01\x15\xd1\x11\x8c\x7a\x00\xc0\x4f\xc2\x97\xeb"

    # The following are potential base64 representations of the DPAPI provider GUID
    #   Generated by putting dpapiProviderGuid into the script here: https://www.leeholmes.com/blog/2017/09/21/searching-for-content-in-base-64-strings/
    dpapi_b64_signatures = [b"AAAA0Iyd3wEV0RGMegDAT8KX6", b"AQAAANCMnd8BFdERjHoAwE/Cl+", b"EAAADQjJ3fARXREYx6AMBPwpfr"]

    current_pos = 0
    loc = raw_bytes.find(dpapi_signature)
    while loc != -1:
        current_pos = loc
        # parse the blob so we get the masterkey GUID and carve the data into one blob
        try:
            blob = await parse_dpapi_blob(raw_bytes[current_pos:])
            if not blob.success:
                if file_name != "" and object_id != "":
                    logger.warning(
                        "carve_dpapi_blobs_from_bytes: blob.rawData is None",
                        file_name=file_name,
                        object_id=object_id,
                    )
                else:
                    logger.warning("carve_dpapi_blobs_from_bytes: blob.rawData is None")
                current_pos += 1
            elif blob.dpapi_data_b64:
                blob.blob_offset = base_offset + current_pos
                current_pos += len(base64.b64decode(blob.dpapi_data_b64))
                if blob.dpapi_data_b64 not in seen_blobs:
                    seen_blobs.add(blob.dpapi_data_b64)
                    dpapi_blobs.append(blob)
        except Exception as e:
            if file_name != "":
                logger.warning(f"exception parsing file {file_name} for dpapi blobs: {e}")
            else:
                logger.warning(f"exception parsing bytes for dpapi blobs: {e}")
            return dpapi_blobs
        loc = raw_bytes.find(dpapi_signature, current_pos)

    # check for our b64 signatures
    for dpapi_b64_signature in dpapi_b64_signatures:
        loc = raw_bytes.find(dpapi_b64_signature, current_pos)
        while loc != -1:
            end_loc = loc
            # try to check for the end of the base64 string
            for i in range(loc, len(raw_bytes)):
                if raw_bytes[i] not in b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=":
                    end_loc = i
                    break
            if end_loc != loc:
                try:
                    dpapi_blob_raw = base64.b64decode(raw_bytes[loc:end_loc])
                    blob = await parse_dpapi_blob(dpapi_blob_raw)
                    blob.blob_offset = base_offset + loc
                    current_pos += end_loc - loc
                    if not blob.success:
                        logger.warning(
                            "carve_dpapi_blobs: blob.rawData is None",
                            file_name=file_name,
                            object_id=object_id,
                        )
                    elif blob.dpapi_data_b64:
                        if blob.dpapi_data_b64 not in seen_blobs:
                            seen_blobs.add(blob.dpapi_data_b64)
                            dpapi_blobs.append(blob)
                except Exception as e:
                    if file_name != "":
                        logger.warning(f"exception parsing file {file_name} for b64dpapi blobs: {e}")
                    else:
                        logger.warning(f"exception parsing bytes for b64dpapi blobs: {e}")
                    return dpapi_blobs
            loc = raw_bytes.find(dpapi_b64_signature, current_pos)

    return dpapi_blobs


async def carve_dpapi_blobs_from_file(file_name: str, object_id: str = "", max_blobs: int = 1000) -> list[dict]:
    """
    Helper that _just_ carves raw DPAPI blobs from a file,
    returning a list of dicts {dpapi_master_key_guid, dpapi_data_b64}
    """

    dpapi_blobs = []
    chunk_size = 512000
    current_offset = 0

    with open(file_name, "rb") as f:
        # chunking to handle large files
        while chunk := f.read(chunk_size):
            blobs = await carve_dpapi_blobs_from_bytes(chunk, file_name, object_id, base_offset=current_offset)
            dpapi_blobs += [blob.model_dump() for blob in blobs[:max_blobs]]
            current_offset += len(chunk)

    return dpapi_blobs
